#!/bin/bash

# 把 quotes.toscrape.com 的10个页面全部爬下来，
# 从第一个页面开始

url='http://quotes.toscrape.com/page/1/'

fetch() {
    local url=$1 filename num
    num=$(echo "$url" | awk -F/ '{print $(NF-1)}')
    filename="quotes-$num.html"
    curl -s "$url" > "$filename"
    url=$(cat "$filename" | grep -E '<a href="/page/[0-9]+/">Next' \
            | sed -r 's/^.*href="([^"]+)".*$/\1/')
    if test -n "$url"; then
        url="http://quotes.toscrape.com$url"
    fi
    echo "$url"
}

while true
do
    echo "fetching $url"
    url=$(fetch "$url")
    if test -z "$url"; then
        echo "all done"
        break
    fi
done
